More Project Visualizations

Here are some sample plots using the voting data. Any thoughts about what this tells us for analysis?

Setup

Note the setup variables below. These are supposed to be controls all the plots. Sometimes they are. Sometimes not. Have to clean this up, but the goal is uniformity across all the plots.

For fonts, I tried to use ‘sans’ which should pick the system sans serif font for whatever OS is running. In the plotly plot I had to pick a specific font or it defaults to Times New Roman. I picked Arial, but we should look at including Helvetica for macOS users.

# --- Load libraries ---
library(ggplot2)
library(ggforce)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(knitr)
library(readr)
library(sf)
Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tigris)
To enable caching of data, set `options(tigris_use_cache = TRUE)`
in your R script or .Rprofile.
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
# --- Global color palette (Civic Triangle style) ---
fill_col   <- "#ffffff"   # white background
line_col   <- "#3a5f7d"   # blue-grey for roads and outlines
text_col   <- "#2f3b44"   # text and titles
alt_line   <- "#536cae"   # secondary line color
alt_text   <- "#536cae"   # secondary text color
border_col <- "#e6eef5"   # light blue-grey for county borders
Figure 7: Mean Years of Schooling by County in Texas (2020)
# --- Export static version if needed ---
ggsave("mean_years_choropleth.png", plot = p_mean_years,
       width = 9, height = 8, dpi = 300, units = "in", limitsize = FALSE)

Figure 7

library(ggplot2)
library(dplyr)

# --- Prepare data ---
# Assume `turnout` already contains Turnout_Rate and edu_index
# Sort counties by turnout descending
bar_data <- turnout %>%
  arrange(desc(Turnout_Rate)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest turnout

# --- Create the “double bar” style plot ---
p_double <- ggplot(bar_data) +
  # Left side: voter participation
  geom_bar(aes(x = -Turnout_Rate, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right side: education index
  geom_bar(aes(x = edu_index * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  # Center line
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  # Axis and labels
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data$Turnout_Rate, na.rm = TRUE),
                max(bar_data$edu_index, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Voting Participation vs. Education Index by County (Texas, 2020)",
    subtitle = "Counties sorted by Voter Participation (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data) + 2,
           label = "Voter Turnout (%)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data) + 2,
           label = "Education Index (0–1 → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)

# --- Display inline ---
p_double
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
# --- Export PNG for publication use ---
ggsave("double_bar_voting_education.png", plot = p_double,
       width = 10, height = 10, dpi = 300, units = "in", limitsize = FALSE)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Figure 8: County-level comparison of Voter Participation and Education Index (2020)

Figure 8

library(ggplot2)
library(dplyr)

# --- Prepare data ---
# Sort counties by education index descending
bar_data_edu <- turnout %>%
  arrange(desc(edu_index)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest education

# --- Create the mirrored “violin-style” double bar chart ---
p_double_edu <- ggplot(bar_data_edu) +
  # Left side: voter participation
  geom_bar(aes(x = -Turnout_Rate, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right side: education index
  geom_bar(aes(x = edu_index * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  # Center line
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  # Axes and scales
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data_edu$Turnout_Rate, na.rm = TRUE),
                max(bar_data_edu$edu_index, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Education Index vs. Voter Participation by County (Texas, 2020)",
    subtitle = "Counties sorted by Education Index (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data_edu) + 2,
           label = "Voter Turnout (%)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data_edu) + 2,
           label = "Education Index (0–1 → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)

# --- Display inline ---
p_double_edu
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
# --- Export PNG for publication use ---
ggsave("double_bar_education_voting.png", plot = p_double_edu,
       width = 10, height = 10, dpi = 300, units = "in", limitsize = FALSE)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Figure 9: County-level comparison of Education Index and Voter Participation (2020)

Figure 9

library(ggplot2)
library(dplyr)
library(sf)
library(tigris)
library(scales)
library(plotly)

# --- Load Texas county geometries ---
options(tigris_use_cache = TRUE)
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020)

# --- Prepare for join ---
tx_counties <- tx_counties %>%
  mutate(County = toupper(NAME))

turnout <- turnout %>%
  mutate(County = toupper(County))

# --- Join with composite civic data ---
tx_voteedu <- left_join(tx_counties, turnout, by = "County")

# --- Compute quintiles for the composite variable ---
tx_voteedu <- tx_voteedu %>%
  mutate(vote_edu_q = ntile(vote_edu, 5))

# --- Build ggplot choropleth ---
p_voteedu <- ggplot(tx_voteedu) +
  geom_sf(aes(fill = vote_edu_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Civic Vitality Index: ", sprintf("%.2f", vote_edu), "<br>",
                "Education Index: ", sprintf("%.2f", edu_index), "<br>",
                "Voter Turnout: ", sprintf("%.2f", Turnout_Rate)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = rev(brewer_pal(palette = "Blues")(5)),  # light = high, dark = low
    name = NULL,
    limits = c(1, 5),
    breaks = c(1, 5),
    labels = c("Highest Civic Vitality", "Lowest Civic Vitality"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(family = "sans", face = "bold",
                              size = 16, hjust = 0.5, color = text_col),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Composite Civic Vitality (Voting + Education) by County (Texas, 2020)")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
# --- Convert to interactive Plotly map ---
p_voteedu_plotly <- ggplotly(p_voteedu, tooltip = "text") %>%
  style(hoverlabel = list(bgcolor = "white",
                          font = list(family = "sans", color = "#333333")))

# --- Display interactive map inline ---
p_voteedu_plotly
Figure 10: Composite Civic Vitality (Voting + Education) by County in Texas (2020)
# --- Export static version if needed ---
ggsave("vote_edu_choropleth.png", plot = p_voteedu,
       width = 9, height = 8, dpi = 300, units = "in", limitsize = FALSE)

Figure 10

library(dplyr)
library(knitr)

# --- Top 10 counties ---
top10_vitality <- turnout %>%
  arrange(desc(vote_edu)) %>%
  slice_head(n = 10) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(vote_edu, edu_index, Turnout_Rate), round, 3)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
kable(top10_vitality,
      caption = "Top 10 Counties by Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Table 2: Top 10 Counties by Civic Vitality Index (2020)
Top 10 Counties by Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
ROBERTS 0.719 0.629 80.882
LLANO 0.708 0.657 75.863
WILLIAMSON 0.708 0.646 76.872
ARMSTRONG 0.693 0.644 74.232
TERRELL 0.692 0.703 68.053
MOTLEY 0.691 0.622 75.902
CALLAHAN 0.683 0.667 69.907
ARCHER 0.680 0.627 73.356
ROBERTSON 0.678 0.672 68.381
JOHNSON 0.671 0.659 68.218
# --- Bottom 10 counties ---
bottom10_vitality <- turnout %>%
  arrange(vote_edu) %>%
  slice_head(n = 10) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3))

kable(bottom10_vitality,
      caption = "Bottom 10 Counties by Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Table 3: Bottom 10 Counties by Civic Vitality Index (2020)
Bottom 10 Counties by Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
ZAPATA 0.386 0.303 46.918
MAVERICK 0.407 0.350 46.433
BAILEY 0.424 0.322 52.557
DEAF SMITH 0.439 0.360 51.798
TITUS 0.444 0.291 59.668
DAWSON 0.448 0.363 53.336
LYNN 0.449 0.328 56.927
ZAVALA 0.455 0.367 54.290
CASTRO 0.455 0.370 54.062
SAN PATRICIO 0.457 0.315 59.866
# --- 5 major Texas metros ---
metro_counties <- c("TRAVIS", "DALLAS", "HARRIS", "BEXAR", "TARRANT")

metro_vitality <- turnout %>%
  filter(County %in% metro_counties) %>%
  select(County, vote_edu, edu_index, Turnout_Rate) %>%
  mutate(across(c(vote_edu, edu_index, Turnout_Rate), round, 3)) %>%
  arrange(desc(vote_edu))

kable(metro_vitality,
      caption = "Major Metro Area Counties: Civic Vitality Index (2020)",
      col.names = c("County", "Civic Vitality", "Education Index", "Turnout Rate"))
Table 4: Major Metro Area Counties: Civic Vitality Index (2020)
Major Metro Area Counties: Civic Vitality Index (2020)
County Civic Vitality Education Index Turnout Rate
TARRANT 0.624 0.560 68.840
HARRIS 0.620 0.578 66.148
TRAVIS 0.525 0.338 71.214
BEXAR 0.502 0.356 64.809
DALLAS 0.483 0.309 65.751

Health Index

library(sf)
library(tigris)
library(scales)

options(tigris_use_cache = TRUE)

# Prepare county geometries
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020) %>%
  mutate(County = toupper(NAME))

# Align join key
turnout_map <- turnout %>%
  mutate(County = toupper(County))

# Join data to geometry
tx_health <- left_join(tx_counties, turnout_map, by = "County")

# Quintiles of health_index (higher = worse health)
tx_health <- tx_health %>%
  mutate(health_q = ntile(health_index, 5))

# Map (dark = worse health, light = better)
p_health <- ggplot(tx_health) +
  geom_sf(aes(fill = health_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Health Index (0–1): ", sprintf("%.2f", health_index)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = brewer_pal(palette = "Reds")(5),
    name = NULL,
    breaks = c(1, 5),
    labels = c("Lowest Health Burden", "Highest Health Burden"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(
      family = "sans", face = "bold",
      size = 16, hjust = 0.5, color = text_col
    ),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Health Burden Index by County (Texas, 2020)")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
p_health
Figure 11: Health Burden Index by County in Texas (higher values = worse health and access)
library(dplyr)
library(ggplot2)

bar_data_hv <- turnout %>%
  mutate(
    health_score = 1 - health_index  # higher = better health
  ) %>%
  arrange(desc(Turnout_Rate)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest turnout

ggplot(bar_data_hv) +
  # Left side: voter participation (%)
  geom_bar(aes(x = -Turnout_Rate, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right side: health score (0–1 scaled to %)
  geom_bar(aes(x = health_score * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data_hv$Turnout_Rate, na.rm = TRUE),
                max(bar_data_hv$health_score, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Voting Participation vs. Health Score by County (Texas, 2020)",
    subtitle = "Counties sorted by Voter Participation (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data_hv) + 2,
           label = "Voter Turnout (%)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data_hv) + 2,
           label = "Health Score (better health → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Figure 12: County-level comparison of Voter Participation and Health Score (2020)
bar_data_he <- turnout %>%
  mutate(
    health_score = 1 - health_index
  ) %>%
  arrange(desc(edu_index)) %>%
  mutate(County = factor(County, levels = rev(County)))  # top = highest education

ggplot(bar_data_he) +
  # Left: Education Index
  geom_bar(aes(x = -edu_index * 100, y = County),
           stat = "identity", fill = line_col, alpha = 0.8, width = 0.8) +
  # Right: Health Score
  geom_bar(aes(x = health_score * 100, y = County),
           stat = "identity", fill = alt_line, alpha = 0.8, width = 0.8) +
  geom_vline(xintercept = 0, color = border_col, linewidth = 0.8) +
  scale_x_continuous(
    name = NULL,
    limits = c(-max(bar_data_he$edu_index, na.rm = TRUE) * 100,
                max(bar_data_he$health_score, na.rm = TRUE) * 100),
    breaks = seq(-100, 100, by = 25),
    labels = function(x) abs(x)
  ) +
  labs(
    y = NULL,
    title = "Education Index vs. Health Score by County (Texas, 2020)",
    subtitle = "Counties sorted by Education Index (Highest → Lowest)"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = fill_col, color = NA),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(color = text_col, size = 10),
    axis.title.x = element_text(color = text_col, face = "bold"),
    plot.title = element_text(color = text_col, face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(color = text_col, size = 12, hjust = 0.5)
  ) +
  annotate("text", x = -90, y = nrow(bar_data_he) + 2,
           label = "Education Index (0–1 → %)", color = line_col,
           family = "sans", fontface = "bold", size = 4.2) +
  annotate("text", x = 90, y = nrow(bar_data_he) + 2,
           label = "Health Score (better health → %)", color = alt_text,
           family = "sans", fontface = "bold", size = 4.2)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`).
Figure 13: County-level comparison of Education Index and Health Score (2020)
library(sf)
library(tigris)
library(scales)

options(tigris_use_cache = TRUE)

tx_counties <- counties(state = "TX", cb = TRUE, year = 2020) %>%
  mutate(County = toupper(NAME))

turnout_map <- turnout %>%
  mutate(
    County = toupper(County),
    health_score = 1 - health_index,
    vote_health = (Turnout_Rate/100 + health_score) / 2
  )

tx_votehealth <- left_join(tx_counties, turnout_map, by = "County") %>%
  mutate(vote_health_q = ntile(vote_health, 5))

p_votehealth <- ggplot(tx_votehealth) +
  geom_sf(aes(fill = vote_health_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Voting + Health Index: ", sprintf("%.2f", vote_health), "<br>",
                "Turnout Rate: ", sprintf("%.1f", Turnout_Rate), "%<br>",
                "Health Score: ", sprintf("%.2f", health_score)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = rev(brewer_pal(palette = "Blues")(5)),
    name = NULL,
    breaks = c(1, 5),
    labels = c("Highest Voting + Health", "Lowest Voting + Health"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(
      family = "sans", face = "bold",
      size = 16, hjust = 0.5, color = text_col
    ),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Composite Voting + Health Index by County (Texas, 2020)")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
p_votehealth
Figure 14: Composite Index of Voter Turnout and Health by County in Texas (2020)

Summary Statistics

Table 5
library(dplyr)

civic_stats <- turnout %>%
  select(Turnout_Rate, edu_index, health_index, civic_index) %>%
  summarise(
    across(
      everything(),
      list(mean = ~mean(.x, na.rm = TRUE),
           sd   = ~sd(.x, na.rm = TRUE),
           min  = ~min(.x, na.rm = TRUE),
           max  = ~max(.x, na.rm = TRUE)),
      .names = "{.col}_{.fn}"
    )
  )

civic_stats
# A tibble: 1 × 16
  Turnout_Rate_mean Turnout_Rate_sd Turnout_Rate_min Turnout_Rate_max
              <dbl>           <dbl>            <dbl>            <dbl>
1              65.4            7.55             43.3             86.9
# ℹ 12 more variables: edu_index_mean <dbl>, edu_index_sd <dbl>,
#   edu_index_min <dbl>, edu_index_max <dbl>, health_index_mean <dbl>,
#   health_index_sd <dbl>, health_index_min <dbl>, health_index_max <dbl>,
#   civic_index_mean <dbl>, civic_index_sd <dbl>, civic_index_min <dbl>,
#   civic_index_max <dbl>
Table 6
civic_cor <- turnout %>%
  select(Turnout_Rate, edu_index, health_index, civic_index) %>%
  cor(use = "complete.obs")

civic_cor
             Turnout_Rate    edu_index health_index civic_index
Turnout_Rate  1.000000000 -0.002484174  -0.66996595   0.7675573
edu_index    -0.002484174  1.000000000  -0.00152774   0.4430431
health_index -0.669965949 -0.001527740   1.00000000  -0.8581813
civic_index   0.767557301  0.443043056  -0.85818135   1.0000000

Civic Index

options(tigris_use_cache = TRUE)

tx_counties <- counties(state = "TX", cb = TRUE, year = 2020) %>%
  mutate(County = toupper(NAME))

turnout_map <- turnout %>%
  mutate(County = toupper(County))

tx_civic <- left_join(tx_counties, turnout_map, by = "County") %>%
  mutate(civic_q = ntile(civic_index, 5))

p_civic <- ggplot(tx_civic) +
  geom_sf(aes(fill = civic_q,
              text = paste0(
                "<b>", County, " County</b><br>",
                "Civic Index: ", sprintf("%.2f", civic_index), "<br>",
                "Turnout: ", sprintf("%.1f", Turnout_Rate), "%<br>",
                "Education Index: ", sprintf("%.2f", edu_index), "<br>",
                "Health Score: ", sprintf("%.2f", 1 - health_index)
              )),
          color = border_col, linewidth = 0.2) +
  scale_fill_gradientn(
    colors = rev(brewer_pal(palette = "PuBu")(5)),
    name = NULL,
    breaks = c(1, 5),
    labels = c("Highest Civic Index", "Lowest Civic Index"),
    guide = guide_colorbar(
      barheight = unit(4, "cm"),
      barwidth  = unit(0.5, "cm"),
      ticks = FALSE,
      label.position = "right",
      title = NULL,
      label.theme = element_text(family = "sans", color = text_col, size = 9)
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    legend.justification = c(0.5, 0.5),
    plot.title = element_text(
      family = "sans", face = "bold",
      size = 16, hjust = 0.5, color = text_col
    ),
    plot.margin = margin(30, 30, 30, 30)
  ) +
  ggtitle("Civic Triangle Index by County (Texas, 2020)")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
p_civic
Figure 15: Civic Triangle Index (Voting + Education + Health) by County in Texas (2020)

Linear Regression

# Basic linear model
model_turnout <- lm(Turnout_Rate ~ edu_index + health_index, data = turnout)

summary(model_turnout)

Call:
lm(formula = Turnout_Rate ~ edu_index + health_index, data = turnout)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.6318  -2.8432   0.1367   3.1171  30.4775 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)    82.578      2.098  39.369   <2e-16 ***
edu_index      -0.279      3.727  -0.075     0.94    
health_index  -37.977      2.656 -14.298   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.626 on 251 degrees of freedom
Multiple R-squared:  0.4489,    Adjusted R-squared:  0.4445 
F-statistic: 102.2 on 2 and 251 DF,  p-value: < 2.2e-16
library(car)
Loading required package: carData

Attaching package: 'car'
The following object is masked from 'package:dplyr':

    recode
# Ensure the model exists
model_turnout <- lm(Turnout_Rate ~ edu_index + health_index, data = turnout)

par(mfrow = c(1, 2))  # Two side-by-side plots

avPlots(
  model_turnout,
  ask = FALSE,
  id.n = 0,            # Do not label counties
  pch = 19,
  col = line_col,
  lwd = 2,
  main = c(
    "Turnout vs. Education (controlling for Health)",
    "Turnout vs. Health (controlling for Education)"
  )
)
Warning in plot.window(...): "id.n" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in box(...): "id.n" is not a graphical parameter
Warning in title(...): "id.n" is not a graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter
Warning in plot.window(...): "id.n" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in box(...): "id.n" is not a graphical parameter
Warning in title(...): "id.n" is not a graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter
Figure 16: Partial Regression (Added-Variable) Plots for Turnout Model
turnout <- turnout %>%
  mutate(
    model_residual = residuals(model_turnout),
    model_fitted   = fitted(model_turnout)
  )
library(sf)
library(tigris)
library(ggplot2)
library(scales)

options(tigris_use_cache = TRUE)

# --- Load Texas county geometries ---
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020) %>%
  mutate(County = toupper(NAME))

# --- Join turnout + residuals ---
turnout_map <- turnout %>% mutate(County = toupper(County))

tx_resid <- left_join(tx_counties, turnout_map, by = "County") %>%
  mutate(
    resid_q = ntile(model_residual, 5),     # numeric 1–5
    resid_q = factor(resid_q)               # convert to factor (required!)
  )

# --- Colors for discrete factor ---
resid_colors <- c(
  "1" = "#a50026",   # Much Lower Than Expected (deep red)
  "2" = "#f46d43",   # Lower
  "3" = "#fdae61",   # Near Expected
  "4" = "#abd9e9",   # Higher
  "5" = "#3288bd"    # Much Higher (blue)
)

# --- Build map ---
ggplot(tx_resid) +
  geom_sf(
    aes(
      fill = resid_q,
      text = paste0(
        "<b>", County, " County</b><br>",
        "Residual: ", sprintf("%.2f", model_residual), "<br>",
        "Actual Turnout: ", sprintf("%.1f", Turnout_Rate), "%<br>",
        "Predicted Turnout: ", sprintf("%.1f", model_fitted), "%"
      )
    ),
    color = border_col, linewidth = 0.2
  ) +
  scale_fill_manual(
    values = resid_colors,
    name = "Turnout vs Expected",
    labels = c(
      "Much Lower",
      "Lower",
      "Near Expected",
      "Higher",
      "Much Higher"
    )
  ) +
  theme_void() +
  theme(
    legend.position = "right",
    plot.title = element_text(
      family = "sans", face = "bold",
      size = 16, hjust = 0.5, color = text_col
    )
  ) +
  ggtitle("Model Residual Map: Where Turnout Deviates From Expectations")
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
Figure 17: Turnout Model Residuals: Counties Voting Higher or Lower Than Expected
library(broom)

# Model 1: Turnout ~ Education
model_edu <- lm(Turnout_Rate ~ edu_index, data = turnout)

# Model 2: Turnout ~ Health
model_health <- lm(Turnout_Rate ~ health_index, data = turnout)

# Model 3: Turnout ~ Education + Health
model_turnout <- lm(Turnout_Rate ~ edu_index + health_index, data = turnout)

# Summaries (compact)
glance_list <- list(
  edu_only   = glance(model_edu),
  health_only = glance(model_health),
  both       = glance(model_turnout)
)

tidy_list <- list(
  edu_only   = tidy(model_edu),
  health_only = tidy(model_health),
  both       = tidy(model_turnout)
)

glance_list
$edu_only
# A tibble: 1 × 12
   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
1 0.00000617      -0.00396  7.56   0.00156   0.969     1  -873. 1753. 1763.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

$health_only
# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
      <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
1     0.449         0.447  5.61      205. 1.87e-34     1  -798. 1601. 1612.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

$both
# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
      <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
1     0.449         0.444  5.63      102. 3.37e-33     2  -798. 1603. 1617.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
tidy_list
$edu_only
# A tibble: 2 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)   65.4        2.31   28.3    3.83e-80
2 edu_index     -0.198      5.01   -0.0394 9.69e- 1

$health_only
# A tibble: 2 × 5
  term         estimate std.error statistic   p.value
  <chr>           <dbl>     <dbl>     <dbl>     <dbl>
1 (Intercept)      82.5      1.24      66.3 1.79e-161
2 health_index    -38.0      2.65     -14.3 1.87e- 34

$both
# A tibble: 3 × 5
  term         estimate std.error statistic   p.value
  <chr>           <dbl>     <dbl>     <dbl>     <dbl>
1 (Intercept)    82.6        2.10   39.4    2.13e-109
2 edu_index      -0.279      3.73   -0.0749 9.40e-  1
3 health_index  -38.0        2.66  -14.3    2.52e- 34
summary(model_turnout)

Call:
lm(formula = Turnout_Rate ~ edu_index + health_index, data = turnout)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.6318  -2.8432   0.1367   3.1171  30.4775 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)    82.578      2.098  39.369   <2e-16 ***
edu_index      -0.279      3.727  -0.075     0.94    
health_index  -37.977      2.656 -14.298   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.626 on 251 degrees of freedom
Multiple R-squared:  0.4489,    Adjusted R-squared:  0.4445 
F-statistic: 102.2 on 2 and 251 DF,  p-value: < 2.2e-16
# Ensure COG is a factor
turnout <- turnout %>%
  mutate(COG = factor(COG))

# Fixed-effects model with COG as factor
model_cog_fe <- lm(Turnout_Rate ~ edu_index + health_index + COG, data = turnout)

summary(model_cog_fe)

Call:
lm(formula = Turnout_Rate ~ edu_index + health_index + COG, data = turnout)

Residuals:
     Min       1Q   Median       3Q      Max 
-14.5563  -2.5863  -0.2794   2.3159  27.9864 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   81.0514     2.3515  34.467  < 2e-16 ***
edu_index     -1.1612     3.6134  -0.321  0.74823    
health_index -32.5492     2.9121 -11.177  < 2e-16 ***
COGACOG       -0.8917     1.9263  -0.463  0.64389    
COGBVRG        1.4606     2.9483   0.495  0.62078    
COGCAPCOG      1.7414     2.6721   0.652  0.51526    
COGCCRPC      -2.0393     3.2757  -0.623  0.53421    
COGCEN-TEX     1.3156     2.0255   0.650  0.51665    
COGCTCOG       1.9086     1.8515   1.031  0.30370    
COGDETCOG      2.0078     2.1654   0.927  0.35477    
COGETC         2.4636     3.9105   0.630  0.52932    
COGETCOG       2.4714     2.0830   1.186  0.23666    
COGGCRPC       0.6497     2.0353   0.319  0.74987    
COGH-GAC       0.3100     1.9138   0.162  0.87148    
COGLGRC       -7.1733     2.7036  -2.653  0.00853 ** 
COGNCTCOG      1.0312     2.1280   0.485  0.62842    
COGNORTEX     -0.2513     1.7192  -0.146  0.88392    
COGPBDC       -5.4367     1.8903  -2.876  0.00440 ** 
COGPHRC        0.5695     1.6174   0.352  0.72509    
COGRPCF      -15.1966     5.3918  -2.818  0.00525 ** 
COGSETRPC     -1.2585     3.2827  -0.383  0.70180    
COGSPPDC      -5.1572     1.6702  -3.088  0.00227 ** 
COGTML         0.2622     2.0268   0.129  0.89720    
COGWCTCOG      0.4445     1.7547   0.253  0.80024    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.227 on 230 degrees of freedom
Multiple R-squared:  0.5641,    Adjusted R-squared:  0.5205 
F-statistic: 12.94 on 23 and 230 DF,  p-value: < 2.2e-16
library(lme4)
Loading required package: Matrix
# Random intercept by COG
model_cog_re <- lmer(Turnout_Rate ~ edu_index + health_index + (1 | COG), data = turnout)

summary(model_cog_re)
Linear mixed model fit by REML ['lmerMod']
Formula: Turnout_Rate ~ edu_index + health_index + (1 | COG)
   Data: turnout

REML criterion at convergence: 1574

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7682 -0.4744 -0.0051  0.5009  5.4247 

Random effects:
 Groups   Name        Variance Std.Dev.
 COG      (Intercept)  4.241   2.059   
 Residual             27.821   5.275   
Number of obs: 254, groups:  COG, 22

Fixed effects:
             Estimate Std. Error t value
(Intercept)    81.321      2.113  38.492
edu_index      -1.042      3.584  -0.291
health_index  -34.321      2.760 -12.436

Correlation of Fixed Effects:
            (Intr) ed_ndx
edu_index   -0.764       
health_indx -0.578 -0.007
library(sf)
library(tigris)
library(spdep)
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`

Attaching package: 'spdep'
The following object is masked from 'package:fmsb':

    geary.test
options(tigris_use_cache = TRUE)

# --- 1. Build sf with residuals ---
tx_counties <- counties(state = "TX", cb = TRUE, year = 2020) %>%
  mutate(County = toupper(NAME))

turnout_resid <- turnout %>%
  mutate(
    County = toupper(County),
    model_residual = residuals(model_turnout)
  )

tx_sf <- left_join(tx_counties, turnout_resid, by = "County")

# --- 2. Create neighbor list (queen contiguity) ---
nb_queen <- poly2nb(tx_sf, queen = TRUE)
lw_queen <- nb2listw(nb_queen, style = "W", zero.policy = TRUE)

# --- 3. Global Moran's I on residuals ---
moran_res <- moran.test(tx_sf$model_residual, lw_queen, zero.policy = TRUE)

moran_res

    Moran I test under randomisation

data:  tx_sf$model_residual  
weights: lw_queen    

Moran I statistic standard deviate = 6.0416, p-value = 7.629e-10
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      0.222010804      -0.003952569       0.001398850 
library(sf)
library(spdep)
library(dplyr)
library(ggplot2)

# --- Compute local Moran's I ---
local_m <- localmoran(tx_sf$model_residual, lw_queen, zero.policy = TRUE)

# Convert to a data frame for naming
local_m <- as.data.frame(local_m)

# Determine which column is the p-value
# Try common names, fallback to the second column
p_col <- dplyr::first(intersect(
  c("Pr(z != E(Ii))", "Pr(z > 0)", "Pr(z < 0)", "p.value"),
  names(local_m)
))

if (is.na(p_col)) {
  # Fallback: use column 2 as p-value
  p_col <- names(local_m)[2]
}

# Add local Moran results to sf object
tx_sf <- tx_sf %>%
  mutate(
    local_I   = local_m[, 1],        # Ii statistic
    local_p   = local_m[, p_col],    # p-value, robust detection
    resid_std = scale(model_residual)[, 1]
  )

# --- Define cluster category ---
tx_sf <- tx_sf %>%
  mutate(
    cluster = case_when(
      resid_std > 0 & local_p < 0.05 ~ "High Cluster",
      resid_std < 0 & local_p < 0.05 ~ "Low Cluster",
      TRUE                          ~ "Not Significant"
    ),
    cluster = factor(cluster, levels = c("High Cluster", "Low Cluster", "Not Significant"))
  )

# --- Color palette ---
cluster_palette <- c(
  "High Cluster"   = "#d7191c",  # red
  "Low Cluster"    = "#2c7bb6",  # blue
  "Not Significant" = "#cccccc"  # grey
)

# --- Plot ---
ggplot(tx_sf) +
  geom_sf(
    aes(fill = cluster),
    color = border_col, linewidth = 0.2
  ) +
  scale_fill_manual(values = cluster_palette, name = "Local Cluster") +
  theme_void() +
  theme(
    legend.position = "right",
    plot.title = element_text(
      family = "sans", face = "bold", size = 16,
      hjust = 0.5, color = text_col
    )
  ) +
  ggtitle("Local Moran's I Clusters of Turnout Residuals")

Local Moran’s I Clusters of Turnout Residuals
{=html}
<div id="civic-dashboard" style="display:flex; gap:24px; flex-wrap:wrap;">
  <div id="tx-map" style="flex:1 1 45%; min-width:350px; height:500px;"></div>
  <div id="tx-scatter" style="flex:1 1 45%; min-width:350px; height:500px;"></div>
</div>

<script src="https://d3js.org/d3.v7.min.js"></script>
<script type="module">
// Use relative paths
const dataUrl = "data/tx_civic_data.csv";
const geoUrl = "data/tx_counties.geojson";  // Changed from topoUrl

// Load data
Promise.all([
  d3.csv(dataUrl, d3.autoType),
  d3.json(geoUrl)
]).then(([rows, geojson]) => {

  // Use GeoJSON directly (no topojson conversion needed)
  const counties = geojson;
  const rowByCounty = new Map(rows.map(d => [d.County, d]));
  // Join turnout to features
  counties.features.forEach(f => {
    const name = f.properties.NAME || f.id;
    const countyName = name.replace(/ County$/i, "");
    const rec = rowByCounty.get(d3.format(".0f")(NaN)) // dummy to keep parser happy
    const match = rowByCounty.get(countyName) || rowByCounty.get(countyName.toUpperCase()) || rowByCounty.get(countyName.toLowerCase());
    f.properties.County = countyName;
    f.properties.data = match || null;
  });

  // Color scale domain on turnout
  const turnoutVals = rows.map(d => d.Turnout_Rate).filter(d => !isNaN(d));
  palette.domain(d3.extent(turnoutVals));

  drawMap(counties, rows);
  drawScatter(rows);
});

// ----- DRAW MAP -----
function drawMap(geo, rows) {
  const container = d3.select("#tx-map");
  const width = container.node().clientWidth;
  const height = container.node().clientHeight;

  container.selectAll("*").remove(); // clear

  const svg = container.append("svg")
    .attr("width", width)
    .attr("height", height);

  const projection = d3.geoMercator()
    .fitSize([width, height], geo);

  const path = d3.geoPath().projection(projection);

  const tooltip = d3.select("body").append("div")
    .attr("class", "civic-tooltip")
    .style("position", "absolute")
    .style("pointer-events", "none")
    .style("background", "rgba(255,255,255,0.9)")
    .style("border", "1px solid #ccc")
    .style("padding", "6px 8px")
    .style("font-size", "12px")
    .style("border-radius", "4px")
    .style("display", "none");

  svg.append("g")
    .selectAll("path")
    .data(geo.features)
    .join("path")
      .attr("d", path)
      .attr("fill", d => {
        const rec = d.properties.data;
        return rec ? palette(rec.Turnout_Rate) : "#eee";
      })
      .attr("stroke", "#ffffff")
      .attr("stroke-width", 0.7)
      .on("mouseenter", (event, d) => {
        const rec = d.properties.data;
        if (!rec) return;
        highlightedCounty = rec.County;
        updateHighlight();
        tooltip
          .style("display", "block")
          .html(`<strong>${rec.County} County</strong><br>
                 Turnout: ${rec.Turnout_Rate.toFixed(1)}%<br>
                 Edu index: ${rec.edu_index.toFixed(2)}<br>
                 Health index: ${rec.health_index.toFixed(2)}`);
      })
      .on("mousemove", (event) => {
        tooltip
          .style("left", (event.pageX + 10) + "px")
          .style("top", (event.pageY + 10) + "px");
      })
      .on("mouseleave", () => {
        highlightedCounty = null;
        updateHighlight();
        tooltip.style("display", "none");
      });

  // Expose selection for scatter
  window.__civicMapSelection = {
    svg: svg,
    geo: geo,
    path: path
  };
}

// ----- DRAW SCATTER -----
function drawScatter(rows) {
  const container = d3.select("#tx-scatter");
  const width = container.node().clientWidth;
  const height = container.node().clientHeight;

  container.selectAll("*").remove();

  const margin = {top: 30, right: 30, bottom: 50, left: 55};
  const innerWidth = width - margin.left - margin.right;
  const innerHeight = height - margin.top - margin.bottom;

  const svg = container.append("svg")
    .attr("width", width)
    .attr("height", height);

  const g = svg.append("g")
    .attr("transform", `translate(${margin.left},${margin.top})`);

  const x = d3.scaleLinear()
    .domain(d3.extent(rows, d => d.health_index)).nice()
    .range([0, innerWidth]);

  const y = d3.scaleLinear()
    .domain(d3.extent(rows, d => d.Turnout_Rate)).nice()
    .range([innerHeight, 0]);

  const color = d3.scaleSequential(d3.interpolateBlues)
    .domain(d3.extent(rows, d => d.edu_index));

  g.append("g")
    .attr("transform", `translate(0,${innerHeight})`)
    .call(d3.axisBottom(x).ticks(6));

  g.append("g")
    .call(d3.axisLeft(y).ticks(6));

  g.append("text")
    .attr("x", innerWidth / 2)
    .attr("y", innerHeight + 40)
    .attr("text-anchor", "middle")
    .text("Health index (higher = worse)");

  g.append("text")
    .attr("x", -innerHeight / 2)
    .attr("y", -40)
    .attr("transform", "rotate(-90)")
    .attr("text-anchor", "middle")
    .text("Turnout rate (%)");

  const dots = g.selectAll("circle")
    .data(rows)
    .join("circle")
      .attr("cx", d => x(d.health_index))
      .attr("cy", d => y(d.Turnout_Rate))
      .attr("r", 4)
      .attr("fill", d => color(d.edu_index))
      .attr("opacity", 0.8)
      .attr("data-county", d => d.County);

  // Tooltip
  const tooltip = d3.select("body").append("div")
    .attr("class", "civic-tooltip")
    .style("position", "absolute")
    .style("pointer-events", "none")
    .style("background", "rgba(255,255,255,0.9)")
    .style("border", "1px solid #ccc")
    .style("padding", "6px 8px")
    .style("font-size", "12px")
    .style("border-radius", "4px")
    .style("display", "none");

  dots
    .on("mouseenter", (event, d) => {
      highlightedCounty = d.County;
      updateHighlight();
      tooltip
        .style("display", "block")
        .html(`<strong>${d.County} County</strong><br>
               Turnout: ${d.Turnout_Rate.toFixed(1)}%<br>
               Edu index: ${d.edu_index.toFixed(2)}<br>
               Health index: ${d.health_index.toFixed(2)}`);
    })
    .on("mousemove", (event) => {
      tooltip
        .style("left", (event.pageX + 10) + "px")
        .style("top", (event.pageY + 10) + "px");
    })
    .on("mouseleave", () => {
      highlightedCounty = null;
      updateHighlight();
      tooltip.style("display", "none");
    });

  window.__civicScatterSelection = {svg, dots};
}

// ----- SHARED HIGHLIGHT -----
function updateHighlight() {
  const scatter = window.__civicScatterSelection;
  const mapSel = window.__civicMapSelection;

  if (scatter) {
    scatter.dots
      .attr("stroke", d => highlightedCounty === d.County ? "#000" : "none")
      .attr("stroke-width", d => highlightedCounty === d.County ? 1.5 : 0)
      .attr("r", d => highlightedCounty === d.County ? 6 : 4);
  }

  if (mapSel) {
    mapSel.svg.selectAll("path")
      .attr("stroke-width", d => {
        const rec = d.properties.data;
        return (rec && highlightedCounty === rec.County) ? 2.0 : 0.7;
      })
      .attr("stroke", d => {
        const rec = d.properties.data;
        return (rec && highlightedCounty === rec.County) ? "#000" : "#ffffff";
      });
  }
}
</script>


{=html}
<div style="margin-top:2rem;">
  <label>Metric: 
    <select id="rank-metric">
      <option value="Turnout_Rate">Turnout (%)</option>
      <option value="edu_index">Education index</option>
      <option value="health_index">Health index</option>
      <option value="vote_edu">Civic index</option>
    </select>
  </label>
  <label style="margin-left:1rem;">COG filter:
    <select id="rank-cog">
      <option value="ALL">All COGs</option>
    </select>
  </label>
</div>
<div id="rank-bars" style="width:100%; height:600px;"></div>

<script type="module">
import * as d3mod from "https://cdn.jsdelivr.net/npm/d3@7/+esm";
const d3 = d3mod;

d3.csv("data/tx_civic_data.csv", d3.autoType).then(rows => {
  const cogSet = Array.from(new Set(rows.map(d => d.COG))).sort();
  const cogSelect = d3.select("#rank-cog");
  cogSet.forEach(c => cogSelect.append("option").attr("value", c).text(c));

  const metricSelect = d3.select("#rank-metric");
  metricSelect.on("change", render);
  cogSelect.on("change", render);

  function render() {
    const metric = metricSelect.node().value;
    const cogVal = cogSelect.node().value;
    let data = rows;
    if (cogVal !== "ALL") {
      data = data.filter(d => d.COG === cogVal);
    }
    data = data.slice().sort((a, b) => d3.descending(a[metric], b[metric])).slice(0, 30);

    const container = d3.select("#rank-bars");
    const width = container.node().clientWidth;
    const height = container.node().clientHeight;
    container.selectAll("*").remove();

    const margin = {top: 30, right: 40, bottom: 40, left: 140};
    const innerWidth = width - margin.left - margin.right;
    const innerHeight = height - margin.top - margin.bottom;

    const svg = container.append("svg")
      .attr("width", width)
      .attr("height", height);

    const g = svg.append("g")
      .attr("transform", `translate(${margin.left},${margin.top})`);

    const y = d3.scaleBand()
      .domain(data.map(d => d.County))
      .range([0, innerHeight])
      .padding(0.15);

    const x = d3.scaleLinear()
      .domain([0, d3.max(data, d => d[metric])]).nice()
      .range([0, innerWidth]);

    g.append("g")
      .call(d3.axisLeft(y).tickSize(0))
      .selectAll("text")
      .style("font-size", "10px");

    g.append("g")
      .attr("transform", `translate(0,${innerHeight})`)
      .call(d3.axisBottom(x).ticks(5));

    g.selectAll("rect")
      .data(data)
      .join("rect")
        .attr("y", d => y(d.County))
        .attr("height", y.bandwidth())
        .attr("x", 0)
        .attr("width", d => x(d[metric]))
        .attr("fill", metric === "Turnout_Rate" ? "#3a5f7d" :
                       metric === "edu_index"    ? "#536cae" :
                       metric === "health_index" ? "#d73a1f" : "#2f3b44");

    g.append("text")
      .attr("x", innerWidth / 2)
      .attr("y", -10)
      .attr("text-anchor", "middle")
      .style("font-weight", "bold")
      .text(`Top counties by ${metric}`);
  }

  render();
});
</script>


{=html}
<div id="civic-corr" style="width:600px; height:400px;"></div>

<script type="module">
import * as d3 from "https://cdn.jsdelivr.net/npm/d3@7/+esm";

d3.csv("data/tx_civic_data.csv", d3.autoType).then(rows => {
  const vars = ["Turnout_Rate", "edu_index", "health_index", "vote_edu"];

  function corr(x, y) {
    const n = x.length;
    const meanX = d3.mean(x);
    const meanY = d3.mean(y);
    const num = d3.sum(x.map((xi, i) => (xi - meanX) * (y[i] - meanY)));
    const den = Math.sqrt(
      d3.sum(x.map(xi => (xi - meanX) ** 2)) *
      d3.sum(y.map(yi => (yi - meanY) ** 2))
    );
    return num / den;
  }

  const data = [];
  vars.forEach((vx, i) => {
    vars.forEach((vy, j) => {
      const x = rows.map(r => r[vx]);
      const y = rows.map(r => r[vy]);
      data.push({x: vx, y: vy, r: corr(x, y)});
    });
  });

  const container = d3.select("#civic-corr");
  const width = container.node().clientWidth;
  const height = container.node().clientHeight;
  const margin = {top: 40, right: 20, bottom: 40, left: 60};
  const innerWidth = width - margin.left - margin.right;
  const innerHeight = height - margin.top - margin.bottom;

  const svg = container.append("svg")
    .attr("width", width)
    .attr("height", height);

  const xScale = d3.scaleBand().domain(vars).range([0, innerWidth]).padding(0.05);
  const yScale = d3.scaleBand().domain(vars).range([0, innerHeight]).padding(0.05);

  const color = d3.scaleSequential(d3.interpolateRdBu).domain([1, -1]);

  const g = svg.append("g")
    .attr("transform", `translate(${margin.left},${margin.top})`);

  g.selectAll("rect")
    .data(data)
    .join("rect")
      .attr("x", d => xScale(d.x))
      .attr("y", d => yScale(d.y))
      .attr("width", xScale.bandwidth())
      .attr("height", yScale.bandwidth())
      .attr("fill", d => color(d.r));

  g.append("g")
    .attr("transform", `translate(0,${innerHeight})`)
    .call(d3.axisBottom(xScale));

  g.append("g")
    .call(d3.axisLeft(yScale));

  svg.append("text")
    .attr("x", width / 2)
    .attr("y", 20)
    .attr("text-anchor", "middle")
    .style("font-weight", "bold")
    .text("Correlation among Civic Variables");
});
</script>


# Turnout_Rate is %; edu_index and health_index are 0–1
model <- lm(Turnout_Rate ~ edu_index + health_index, data = turnout)

turnout <- turnout %>%
  mutate(
    Predicted_Turnout = predict(model),
    County = stringr::str_to_title(County)
  )
library(tidyr)

Attaching package: 'tidyr'
The following objects are masked from 'package:Matrix':

    expand, pack, unpack
library(dplyr)

slope_data <- turnout %>%
  select(County, Turnout_Rate, Predicted_Turnout) %>%
  pivot_longer(
    cols = c(Turnout_Rate, Predicted_Turnout),
    names_to = "Type",
    values_to = "Value"
  ) %>%
  mutate(
    Type = factor(Type, levels = c("Predicted_Turnout", "Turnout_Rate")),
    Label = ifelse(Type == "Turnout_Rate", "Actual", "Predicted")
  )
library(ggplot2)

# compute error for ordering/emphasis
slope_order <- turnout %>%
  mutate(Error = Turnout_Rate - Predicted_Turnout) %>%
  arrange(Error) %>%
  pull(County)

slope_data$County <- factor(slope_data$County, levels = slope_order)

# highlight counties with largest deviation
turnout <- turnout %>%
  mutate(
    Error = Turnout_Rate - Predicted_Turnout,
    Highlight = abs(Error) > quantile(abs(Error), 0.85)   # top 15% outliers
  )

highlight_counties <- turnout$County[turnout$Highlight]

ggplot(slope_data, aes(x = Type, y = Value, group = County)) +
  geom_line(
    aes(color = County %in% highlight_counties),
    linewidth = 0.9,
    alpha = 0.9
  ) +
  geom_text(
    data = slope_data %>% filter(Type == "Predicted_Turnout"),
    aes(label = paste0(County, "  ", round(Value, 1), "%")),
    hjust = 1.05,
    size = 3,
    color = "#3a5f7d"
  ) +
  geom_text(
    data = slope_data %>% filter(Type == "Turnout_Rate"),
    aes(label = paste0(round(Value, 1), "%  ", County)),
    hjust = -0.05,
    size = 3,
    color = "#536cae"
  ) +
  scale_color_manual(values = c("FALSE" = "#c6c6c6", "TRUE" = "#d73a1f"), guide = "none") +
  scale_x_discrete(labels = c("Predicted_Turnout" = "Predicted", "Turnout_Rate" = "Actual")) +
  theme_minimal(base_family = "sans") +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
    plot.background = element_rect(fill = "#ffffff", color = NA),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  ggtitle("Actual vs. Predicted Voter Turnout (Texas Counties)")
Figure 18: Actual vs. Predicted Voter Turnout (Slopegraph)
library(dplyr)

# Determine 10 largest counties by registered voters
top10 <- turnout %>%
  arrange(desc(Registered_Voters)) %>%
  slice_head(n = 10) %>%
  mutate(County = stringr::str_to_title(County))

# Fit the model on ALL counties (recommended), but predict only for the top 10
model <- lm(Turnout_Rate ~ edu_index + health_index, data = turnout)

top10 <- top10 %>%
  mutate(
    Predicted_Turnout = predict(model, newdata = top10)
  )
library(tidyr)

slope_data <- top10 %>%
  select(County, Turnout_Rate, Predicted_Turnout) %>%
  pivot_longer(
    cols = c(Turnout_Rate, Predicted_Turnout),
    names_to = "Type",
    values_to = "Value"
  ) %>%
  mutate(
    Type = factor(Type, levels = c("Predicted_Turnout", "Turnout_Rate")),
    Label = ifelse(Type == "Turnout_Rate", "Actual", "Predicted")
  )
library(ggplot2)

# Order counties by prediction error
error_order <- top10 %>%
  mutate(Error = Turnout_Rate - Predicted_Turnout) %>%
  arrange(Error) %>%
  pull(County)

slope_data$County <- factor(slope_data$County, levels = error_order)

# Highlight counties with the largest deviation
top10 <- top10 %>%
  mutate(
    Error = Turnout_Rate - Predicted_Turnout,
    Highlight = abs(Error) > quantile(abs(Error), 0.75)  # top quartile
  )

highlight_counties <- top10$County[top10$Highlight]

ggplot(slope_data, aes(x = Type, y = Value, group = County)) +
  geom_line(
    aes(color = County %in% highlight_counties),
    linewidth = 1.2,
    alpha = 0.9
  ) +
  geom_point(aes(color = County %in% highlight_counties), size = 2) +
  geom_text(
    data = slope_data %>% filter(Type == "Predicted_Turnout"),
    aes(label = paste0(County, "  ", round(Value, 1), "%")),
    hjust = 1.1,
    size = 3.6,
    color = "#3a5f7d"
  ) +
  geom_text(
    data = slope_data %>% filter(Type == "Turnout_Rate"),
    aes(label = paste0(round(Value, 1), "%  ", County)),
    hjust = -0.1,
    size = 3.6,
    color = "#536cae"
  ) +
  scale_color_manual(values = c("FALSE" = "#bfbfbf", "TRUE" = "#d73a1f"), guide = "none") +
  scale_x_discrete(labels = c("Predicted_Turnout" = "Predicted", "Turnout_Rate" = "Actual")) +
  theme_minimal(base_family = "sans") +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.background = element_rect(fill = "#ffffff", color = NA),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  ggtitle("Actual vs. Predicted Voter Turnout (Top 10 Largest Texas Counties)")
Figure 19: Actual vs. Predicted Voter Turnout (Top 10 Texas Counties)

Notes

Certain elements of this preparation were enhanced with an LLM including but not limited to code restructuring, commenting, and information layout.